import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme()
sns.color_palette("crest", as_cmap=True)
sns.set_palette("crest")
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv("../data/DSI_kickstarterscrape_dataset.csv")
df.head()
| project id | name | url | category | subcategory | location | status | goal | pledged | funded percentage | backers | funded date | levels | reward levels | updates | comments | duration | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 39409 | WHILE THE TREES SLEEP | http://www.kickstarter.com/projects/emiliesaba... | Film & Video | Short Film | Columbia, MO | successful | 10500.0 | 11545.0 | 1.099524 | 66 | Fri, 19 Aug 2011 19:28:17 -0000 | 7 | $25,$50,$100,$250,$500,$1,000,$2,500 | 10 | 2 | 30.00 |
| 1 | 126581 | Educational Online Trading Card Game | http://www.kickstarter.com/projects/972789543/... | Games | Board & Card Games | Maplewood, NJ | failed | 4000.0 | 20.0 | 0.005000 | 2 | Mon, 02 Aug 2010 03:59:00 -0000 | 5 | $1,$5,$10,$25,$50 | 6 | 0 | 47.18 |
| 2 | 138119 | STRUM | http://www.kickstarter.com/projects/185476022/... | Film & Video | Animation | Los Angeles, CA | live | 20000.0 | 56.0 | 0.002800 | 3 | Fri, 08 Jun 2012 00:00:31 -0000 | 10 | $1,$10,$25,$40,$50,$100,$250,$1,000,$1,337,$9,001 | 1 | 0 | 28.00 |
| 3 | 237090 | GETTING OVER - One son's search to finally kno... | http://www.kickstarter.com/projects/charnick/g... | Film & Video | Documentary | Los Angeles, CA | successful | 6000.0 | 6535.0 | 1.089167 | 100 | Sun, 08 Apr 2012 02:14:00 -0000 | 13 | $1,$10,$25,$30,$50,$75,$85,$100,$110,$250,$500... | 4 | 0 | 32.22 |
| 4 | 246101 | The Launch of FlyeGrlRoyalty "The New Nam... | http://www.kickstarter.com/projects/flyegrlroy... | Fashion | Fashion | Novi, MI | failed | 3500.0 | 0.0 | 0.000000 | 0 | Wed, 01 Jun 2011 15:25:39 -0000 | 6 | $10,$25,$50,$100,$150,$250 | 2 | 0 | 30.00 |
# Data cleaning to get rid of repeat and format errors
df["category"][df["category"] == "Film & Video"] = "Film & Video"
df["subcategory"][df["subcategory"] == "Country & Folk"] = "Country & Folk"
df["subcategory"][df["subcategory"] == "Film & Video"] = "Film & Video"
df["subcategory"][df["subcategory"] == "Board & Card Games"] = "Board & Card Games"
df["subcategory"].unique()
array(['Short Film', 'Board & Card Games', 'Animation', 'Documentary',
'Fashion', 'Music', 'Illustration', 'Film & Video',
'Open Software', 'Indie Rock', 'Dance', 'Fiction', 'Nonfiction',
'Theater', 'Games', 'Art Book', 'Country & Folk', 'Comics',
'Webseries', 'Technology', 'Performance Art', 'Narrative Film',
'Video Games', 'Product Design', 'Rock', 'Painting', 'Photography',
'Conceptual Art', 'Jazz', 'Open Hardware', 'Classical Music',
'Food', 'Art', 'Pop', 'Journalism', 'Poetry', 'Electronic Music',
'World Music', 'Sculpture', 'Publishing', "Children's Book",
'Public Art', 'Mixed Media', 'Graphic Design', 'Hip-Hop',
'Periodical', 'Crafts', 'Design', 'Digital Art'], dtype=object)
df["status"].unique()
array(['successful', 'failed', 'live', 'canceled', 'suspended'],
dtype=object)
df["difference between goal and pledge"] = df["goal"] - df["pledged"]
df["weekday"] = df["funded date"].str.split(",").str[0]
df["fund date and time"] = df["funded date"].str.split(",").str[1]
df["day"] = df["fund date and time"].str.split(" ").str[1].str.strip().astype("int64")
df["month"] = df["fund date and time"].str.split(" ").str[2].str.strip()
df["year"] = df["fund date and time"].str.split(" ").str[3].str.strip().astype("int64")
df["time"] = df["fund date and time"].str.split(" ").str[4].str.strip()
df["datetime"] = df[['day', 'month', "year"]].astype(str).agg('-'.join, axis=1)
df["datetime"] = df[["datetime", "time"]].agg(" ".join, axis = 1)
df["datetime"] = pd.to_datetime(df["datetime"])
df['hour'] = pd.DatetimeIndex(df['datetime']).hour
months_dict = {"Jan":1, "Feb":2, "Mar":3, "Apr":4, "May":5, "Jun":6,
"Jul":7, "Aug":8, "Sep":9, "Oct":10, "Nov":11, "Dec":12}
df["month numeric"] = df["month"].replace(months_dict)
df = df.drop("fund date and time", axis = 1)
df["category and subcategory"] = df["category"]+ " - " + df["subcategory"]
df["category and subcategory"].unique()
array(['Film & Video - Short Film', 'Games - Board & Card Games',
'Film & Video - Animation', 'Film & Video - Documentary',
'Fashion - Fashion', 'Music - Music', 'Art - Illustration',
'Film & Video - Film & Video', 'Technology - Open Software',
'Music - Indie Rock', 'Dance - Dance', 'Publishing - Fiction',
'Publishing - Nonfiction', 'Theater - Theater', 'Games - Games',
'Publishing - Art Book', 'Music - Country & Folk',
'Comics - Comics', 'Film & Video - Webseries',
'Technology - Technology', 'Art - Performance Art',
'Film & Video - Narrative Film', 'Games - Video Games',
'Design - Product Design', 'Music - Rock', 'Art - Painting',
'Photography - Photography', 'Art - Conceptual Art',
'Music - Jazz', 'Technology - Open Hardware',
'Music - Classical Music', 'Food - Food', 'Art - Art',
'Music - Pop', 'Publishing - Journalism', 'Publishing - Poetry',
'Music - Electronic Music', 'Music - World Music',
'Art - Sculpture', 'Publishing - Publishing',
"Publishing - Children's Book", 'Art - Public Art',
'Art - Mixed Media', 'Design - Graphic Design', 'Music - Hip-Hop',
'Publishing - Periodical', 'Design - Crafts', 'Design - Design',
'Art - Digital Art'], dtype=object)
df = df.sort_values(['category','subcategory'],ascending=[True,True])
pd.set_option('display.max_columns', 0)
df.columns = df.columns.str.capitalize()
df.head()
| Project id | Name | Url | Category | Subcategory | Location | Status | Goal | Pledged | Funded percentage | Backers | Funded date | Levels | Reward levels | Updates | Comments | Duration | Difference between goal and pledge | Weekday | Day | Month | Year | Time | Datetime | Hour | Month numeric | Category and subcategory | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 96 | 4684967 | Brooklyn Pop-Up Show: VULNERABLE | http://www.kickstarter.com/projects/229collect... | Art | Art | Brooklyn, NY | successful | 500.0 | 563.0 | 1.12600 | 25 | Fri, 06 Apr 2012 16:31:08 -0000 | 4 | $5,$25,$50,$150 | 2 | 0 | 27.96 | -63.0 | Fri | 6 | Apr | 2012 | 16:31:08 | 2012-04-06 16:31:08 | 16 | 4 | Art - Art |
| 100 | 4732285 | Insiders/Out: Exploring Outsider Art in America | http://www.kickstarter.com/projects/insidersou... | Art | Art | NaN | successful | 2000.0 | 2000.0 | 0.99983 | 30 | Thu, 08 Jul 2010 21:39:00 -0000 | 13 | $1,$5,$10,$15,$20,$25,$35,$50,$75,$100,$150,$2... | 4 | 1 | 42.97 | 0.0 | Thu | 8 | Jul | 2010 | 21:39:00 | 2010-07-08 21:39:00 | 21 | 7 | Art - Art |
| 190 | 9729076 | parallels: defining home | http://www.kickstarter.com/projects/981542607/... | Art | Art | Grand Rapids, MI | successful | 2500.0 | 2500.0 | 1.00000 | 33 | Sat, 05 Feb 2011 04:44:57 -0000 | 7 | $10,$25,$50,$75,$150,$300,$500 | 1 | 0 | 12.00 | 0.0 | Sat | 5 | Feb | 2011 | 04:44:57 | 2011-02-05 04:44:57 | 4 | 2 | Art - Art |
| 193 | 9993639 | Greenspirit Arts RE-LAUNCH of Limited-Edition ... | http://www.kickstarter.com/projects/551175469/... | Art | Art | Westport, NY | successful | 8000.0 | 8940.0 | 1.11750 | 74 | Mon, 21 Mar 2011 03:59:00 -0000 | 11 | $1,$5,$10,$20,$40,$60,$100,$250,$475,$500,$600 | 25 | 5 | 39.37 | -940.0 | Mon | 21 | Mar | 2011 | 03:59:00 | 2011-03-21 03:59:00 | 3 | 3 | Art - Art |
| 231 | 11201727 | Inspiring and Lifting Lives... Through Art! | http://www.kickstarter.com/projects/aa2d/most-... | Art | Art | Austin, TX | failed | 750.0 | 18.0 | 0.02400 | 3 | Wed, 12 Oct 2011 05:16:35 -0000 | 11 | $1,$5,$12,$20,$50,$100,$325,$750,$1,000,$2,000... | 1 | 0 | 21.00 | 732.0 | Wed | 12 | Oct | 2011 | 05:16:35 | 2011-10-12 05:16:35 | 5 | 10 | Art - Art |
sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white', 'grid.color':'#d9d9d9', "axes.edgecolor":'#d9d9d9',
"font.size": 16, "axes.labelsize" : 18, "xtick.labelsize" : 16, "ytick.labelsize" : 16})
# Create an array with the colors you want to use
colors = ["#7EAF92", "#fc8b8b", "#8DDAD6", "#faeb87", "#f5efcb"]
# Set your custom color palette
sns.set_palette(sns.color_palette(colors))
fig, ax = plt.subplots(figsize=(25,8))
count_status = sns.countplot(data = df, y = "Status")
plt.bar_label(count_status.containers[0], padding = 4)
plt.xlabel("Count")
Text(0.5, 0, 'Count')
# Separate successful and failed
df_successful = df[df["Status"] == "successful"]
df_failed = df[df["Status"] == "failed"]
fig, ax = plt.subplots(figsize=(15,8))
#sns.displot(data=df, x="Funded percentage", hue = "Status", kde = False)
"""h, bins = np.histogram(df["Funded percentage"], bins = 500000)
y_hist = plt.bar(bins[:-1], h)"""
labels = ["Failed", "Successful"]
percentage_both = [df_successful["Funded percentage"], df_failed["Funded percentage"]]
y_hist= plt.hist(percentage_both, bins =1506600,
histtype = "stepfilled")
plt.xlim(0, 2)
plt.xlabel("Funded Percentage")
plt.ylabel("Count")
plt.legend(labels, loc='upper right')
<matplotlib.legend.Legend at 0x2357bf6a340>
# Drop live, canceled and suspended
df = df.drop(df[(df["Status"] == "live") | (df["Status"] == "canceled")| (df["Status"] == "suspended")].index, axis = 0)
df = df.drop("Project id", axis = 1)
(df_successful["Funded percentage"] >= 1.5).sum()
#np.histogram(df["Funded percentage"], bins = 1506600)
3961
#df_successful.corr()
f, ax = plt.subplots(figsize=(14, 14))
sns.heatmap(df_successful.iloc[:,1:].corr(), square = True, annot = True, fmt = '.3f')
<AxesSubplot:>
subcategory_count_total = df["Category"].value_counts()
subcategory_count_s = df_successful["Category"].value_counts()
subcategory_count_f = df_failed["Category"].value_counts()
all_counts = pd.concat([subcategory_count_total, subcategory_count_s, subcategory_count_f], axis = 1)
all_counts.columns = ["Total", "Successful", "Failed"]
all_counts.loc['Average']= all_counts.mean()
all_counts["Successful Percentage"] = round(all_counts.Successful/ all_counts.Total*100, 0).astype("int64")
all_counts["Failed Percentage"] = round(all_counts.Failed/ all_counts.Total*100, 0).astype("int64")
all_counts["Total Percentage"] = 100
all_counts_regular = all_counts.iloc[:-1, :].sort_values(by = "Successful Percentage", ascending = False)
all_counts_average = all_counts.iloc[-1:, :]
all_counts = pd.concat([all_counts_regular, all_counts_average], axis = 0)
all_counts
| Total | Successful | Failed | Successful Percentage | Failed Percentage | Total Percentage | |
|---|---|---|---|---|---|---|
| Dance | 707.000000 | 527.000000 | 180.000000 | 75 | 25 | 100 |
| Theater | 2322.000000 | 1642.000000 | 680.000000 | 71 | 29 | 100 |
| Music | 10053.000000 | 6792.000000 | 3261.000000 | 68 | 32 | 100 |
| Art | 3691.000000 | 2106.000000 | 1585.000000 | 57 | 43 | 100 |
| Comics | 968.000000 | 520.000000 | 448.000000 | 54 | 46 | 100 |
| Film & Video | 12595.000000 | 6423.000000 | 6172.000000 | 51 | 49 | 100 |
| Food | 1298.000000 | 655.000000 | 643.000000 | 50 | 50 | 100 |
| Design | 1566.000000 | 729.000000 | 837.000000 | 47 | 53 | 100 |
| Photography | 1387.000000 | 645.000000 | 742.000000 | 47 | 53 | 100 |
| Games | 1467.000000 | 635.000000 | 832.000000 | 43 | 57 | 100 |
| Publishing | 4155.000000 | 1671.000000 | 2484.000000 | 40 | 60 | 100 |
| Technology | 737.000000 | 289.000000 | 448.000000 | 39 | 61 | 100 |
| Fashion | 1019.000000 | 335.000000 | 684.000000 | 33 | 67 | 100 |
| Average | 3228.076923 | 1766.846154 | 1461.230769 | 55 | 45 | 100 |
fig, ax = plt.subplots(figsize=(25,6))
labels = ["Failed", "Successful"]
#plt.hist(duration_both, bins = round((df["Duration"].max()-df["Duration"].min())/2), density=True)
plt.bar(all_counts.index, all_counts["Total Percentage"], color = "#fc8b8b")
plt.bar(all_counts.index, all_counts["Successful Percentage"], color = "#7EAF92")
plt.ylim(0,100)
ax.yaxis.set_major_locator(plt.MaxNLocator(10))
ax.legend(labels)
plt.xticks(rotation=90)
plt.xlabel("Category")
plt.ylabel("Percentage of success and failure")
Text(0, 0.5, 'Percentage of success and failure')
subcategory_count_total = df["Category and subcategory"].value_counts()
subcategory_count_s = df_successful["Category and subcategory"].value_counts()
subcategory_count_f = df_failed["Category and subcategory"].value_counts()
all_counts = pd.concat([subcategory_count_total, subcategory_count_s, subcategory_count_f], axis = 1)
all_counts.columns = ["Total", "Successful", "Failed"]
all_counts.loc['Average']= all_counts.mean()
all_counts["Successful Percentage"] = round(all_counts.Successful/ all_counts.Total*100, 0).astype("int64")
all_counts["Failed Percentage"] = round(all_counts.Failed/ all_counts.Total*100, 0).astype("int64")
all_counts["Total Percentage"] = 100
all_counts_regular = all_counts.iloc[:-1, :].sort_values(by = "Successful Percentage", ascending = False)
all_counts_average = all_counts.iloc[-1:, :]
all_counts = pd.concat([all_counts_regular, all_counts_average], axis = 0)
all_counts
fig, ax = plt.subplots(figsize=(25,6))
labels = ["Failed", "Successful"]
#plt.hist(duration_both, bins = round((df["Duration"].max()-df["Duration"].min())/2), density=True)
plt.bar(all_counts.index, all_counts["Total Percentage"], color = "#fc8b8b")
plt.bar(all_counts.index, all_counts["Successful Percentage"], color = "#7EAF92")
plt.ylim(0,100)
ax.yaxis.set_major_locator(plt.MaxNLocator(10))
ax.legend(labels)
plt.xticks(rotation=90)
plt.xlabel("Subcategory")
plt.ylabel("Percentage of success and failure")
Text(0, 0.5, 'Percentage of success and failure')
fig, ax = plt.subplots(figsize=(25,8))
my_order = df_successful.groupby(by="Category")["Funded percentage"].median().sort_values(ascending = True).index
box_subcat = sns.boxplot(data=df_successful, x='Category', y='Funded percentage', hue='Category', palette="Spectral", dodge = False, showfliers = False, order = my_order)
ax.axhline(y = 1, color='red', linewidth=2, alpha=.7, ls='--')
plt.xticks(rotation=90)
plt.ylim(1, 5)
plt.legend(loc='upper left')
my_order
Index(['Dance', 'Film & Video', 'Fashion', 'Theater', 'Food', 'Art', 'Music',
'Publishing', 'Photography', 'Comics', 'Design', 'Technology', 'Games'],
dtype='object', name='Category')
fig, ax = plt.subplots(figsize=(25,8))
my_order = df_successful.groupby(by="Subcategory")["Funded percentage"].median().sort_values(ascending = True).index
box_subcat = sns.boxplot(data=df_successful, x='Subcategory', y='Funded percentage', hue='Category', palette="Spectral", dodge = False, showfliers = False, order = my_order)
ax.axhline(y = 1, color='red', linewidth=2, alpha=.7, ls='--')
plt.xticks(rotation=90)
plt.ylim(1, 7)
plt.legend(loc='upper left')
my_order
Index(['Narrative Film', 'Performance Art', 'Public Art', 'Short Film',
'Dance', 'Film & Video', 'Digital Art', 'Jazz', 'Mixed Media', 'Poetry',
'Documentary', 'Fashion', 'Webseries', 'Theater', 'Food',
'Children's Book', 'Music', 'Classical Music', 'World Music',
'Conceptual Art', 'Sculpture', 'Pop', 'Rock', 'Journalism',
'Publishing', 'Art', 'Indie Rock', 'Country & Folk', 'Hip-Hop',
'Electronic Music', 'Nonfiction', 'Photography', 'Crafts', 'Periodical',
'Animation', 'Open Software', 'Painting', 'Fiction', 'Design',
'Art Book', 'Illustration', 'Video Games', 'Comics', 'Technology',
'Product Design', 'Graphic Design', 'Board & Card Games', 'Games',
'Open Hardware'],
dtype='object', name='Subcategory')
fig, ax = plt.subplots(figsize=(25,8))
box_subcat = sns.boxplot(data=df, x='Subcategory', y='Difference between goal and pledge',
hue='Category', palette="Spectral", dodge = False, showfliers = False)
ax.axhline(y = 0, color='grey', linewidth=2, alpha=.7, ls='--')
plt.xticks(rotation=90)
plt.ylim(-25000, 40000)
plt.legend(loc='upper left')
<matplotlib.legend.Legend at 0x23500ca2460>
fig, ax = plt.subplots(figsize=(25,8))
box_subcat = sns.boxplot(data=df, x='Category and subcategory', y='Goal',
hue='Category', palette="Spectral", dodge = False, showfliers = False)
ax.axhline(y = 1, color='grey', linewidth=2, alpha=.7, ls='--')
plt.xticks(rotation=90)
plt.ylim(0, 30000)
plt.legend(loc='upper left')
<matplotlib.legend.Legend at 0x23576435610>
fig, ax = plt.subplots(figsize=(25,8))
box_subcat = sns.boxplot(data=df, x='Category and subcategory', y='Pledged',
hue='Category', palette="Spectral", dodge = False, showfliers = False)
ax.axhline(y = 1, color='grey', linewidth=2, alpha=.7, ls='--')
plt.xticks(rotation=90)
plt.ylim(0, 20000)
(0.0, 20000.0)
#fig, ax = plt.subplots(figsize=(25,15))
goal_violin = sns.catplot(x="Category and subcategory", y="Goal", hue="Status",
kind="violin", split = True, data=df, height = 5, aspect = 5)
plt.ylim(0, 70000)
plt.xticks(rotation=90)
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48]),
[Text(0, 0, 'Art - Art'),
Text(1, 0, 'Art - Conceptual Art'),
Text(2, 0, 'Art - Digital Art'),
Text(3, 0, 'Art - Illustration'),
Text(4, 0, 'Art - Mixed Media'),
Text(5, 0, 'Art - Painting'),
Text(6, 0, 'Art - Performance Art'),
Text(7, 0, 'Art - Public Art'),
Text(8, 0, 'Art - Sculpture'),
Text(9, 0, 'Comics - Comics'),
Text(10, 0, 'Dance - Dance'),
Text(11, 0, 'Design - Crafts'),
Text(12, 0, 'Design - Design'),
Text(13, 0, 'Design - Graphic Design'),
Text(14, 0, 'Design - Product Design'),
Text(15, 0, 'Fashion - Fashion'),
Text(16, 0, 'Film & Video - Animation'),
Text(17, 0, 'Film & Video - Documentary'),
Text(18, 0, 'Film & Video - Film & Video'),
Text(19, 0, 'Film & Video - Narrative Film'),
Text(20, 0, 'Film & Video - Short Film'),
Text(21, 0, 'Film & Video - Webseries'),
Text(22, 0, 'Food - Food'),
Text(23, 0, 'Games - Board & Card Games'),
Text(24, 0, 'Games - Games'),
Text(25, 0, 'Games - Video Games'),
Text(26, 0, 'Music - Classical Music'),
Text(27, 0, 'Music - Country & Folk'),
Text(28, 0, 'Music - Electronic Music'),
Text(29, 0, 'Music - Hip-Hop'),
Text(30, 0, 'Music - Indie Rock'),
Text(31, 0, 'Music - Jazz'),
Text(32, 0, 'Music - Music'),
Text(33, 0, 'Music - Pop'),
Text(34, 0, 'Music - Rock'),
Text(35, 0, 'Music - World Music'),
Text(36, 0, 'Photography - Photography'),
Text(37, 0, 'Publishing - Art Book'),
Text(38, 0, "Publishing - Children's Book"),
Text(39, 0, 'Publishing - Fiction'),
Text(40, 0, 'Publishing - Journalism'),
Text(41, 0, 'Publishing - Nonfiction'),
Text(42, 0, 'Publishing - Periodical'),
Text(43, 0, 'Publishing - Poetry'),
Text(44, 0, 'Publishing - Publishing'),
Text(45, 0, 'Technology - Open Hardware'),
Text(46, 0, 'Technology - Open Software'),
Text(47, 0, 'Technology - Technology'),
Text(48, 0, 'Theater - Theater')])
df[df["Category"] == "Food"]
| Name | Url | Category | Subcategory | Location | Status | Goal | Pledged | Funded percentage | Backers | Funded date | Levels | Reward levels | Updates | Comments | Duration | Difference between goal and pledge | Weekday | Day | Month | Year | Time | Datetime | Hour | Month numeric | Category and subcategory | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 76 | HAPPY AS A CLAM | http://www.kickstarter.com/projects/1114111720... | Food | Food | St Augustine, FL | failed | 80000.0 | 0.0 | 0.000000 | 0 | Tue, 07 Feb 2012 00:09:05 -0000 | 7 | $25,$50,$100,$500,$1,000,$5,000,$7,500 | 0 | 0 | 60.00 | 80000.0 | Tue | 7 | Feb | 2012 | 00:09:05 | 2012-02-07 00:09:05 | 0 | 2 | Food - Food |
| 83 | TOMATOCLAM et al | http://www.kickstarter.com/projects/1795134881... | Food | Food | Virginia Beach, VA | failed | 100000.0 | 0.0 | 0.000000 | 0 | Tue, 07 Feb 2012 21:42:28 -0000 | 2 | $50,$50 | 0 | 0 | 45.00 | 100000.0 | Tue | 7 | Feb | 2012 | 21:42:28 | 2012-02-07 21:42:28 | 21 | 2 | Food - Food |
| 113 | Great Beer Goes Down Easy | http://www.kickstarter.com/projects/1284366959... | Food | Food | Houston, TX | failed | 55000.0 | 2565.0 | 0.046636 | 15 | Fri, 09 Mar 2012 00:00:00 -0000 | 18 | $1,$5,$10,$25,$35,$50,$100,$150,$250,$300,$500... | 7 | 0 | 34.43 | 52435.0 | Fri | 9 | Mar | 2012 | 00:00:00 | 2012-03-09 00:00:00 | 0 | 3 | Food - Food |
| 144 | Robotic First Fruits Farm-Sustainable Food, En... | http://www.kickstarter.com/projects/1651556528... | Food | Food | Germantown, MD | failed | 25000.0 | 362.0 | 0.014480 | 18 | Fri, 24 Feb 2012 17:09:45 -0000 | 26 | $5,$10,$15,$25,$30,$30,$33,$33,$35,$45,$55,$65... | 2 | 0 | 45.00 | 24638.0 | Fri | 24 | Feb | 2012 | 17:09:45 | 2012-02-24 17:09:45 | 17 | 2 | Food - Food |
| 152 | NADIA'S KITCHEN. Moroccan food stand in Brookl... | http://www.kickstarter.com/projects/1877029566... | Food | Food | Brooklyn, NY | failed | 3000.0 | 1005.0 | 0.335000 | 20 | Sun, 18 Mar 2012 05:39:33 -0000 | 8 | $1,$5,$10,$20,$50,$100,$500,$1,000 | 1 | 0 | 11.96 | 1995.0 | Sun | 18 | Mar | 2012 | 05:39:33 | 2012-03-18 05:39:33 | 5 | 3 | Food - Food |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 45841 | Macaron Porn: Parisian Recipes | http://www.kickstarter.com/projects/jennijaime... | Food | Food | Gainesville, FL | failed | 4500.0 | 110.0 | 0.024444 | 4 | Fri, 09 Mar 2012 03:18:36 -0000 | 8 | $10,$25,$50,$75,$125,$200,$500,$1,000 | 2 | 0 | 45.00 | 4390.0 | Fri | 9 | Mar | 2012 | 03:18:36 | 2012-03-09 03:18:36 | 3 | 3 | Food - Food |
| 45842 | Macaron Porn: Parisian Recipes | http://www.kickstarter.com/projects/jennijaime... | Food | Food | Gainesville, FL | failed | 4500.0 | 110.0 | 0.024444 | 4 | Fri, 09 Mar 2012 03:18:36 -0000 | 8 | $10,$25,$50,$75,$125,$200,$500,$1,000 | 2 | 0 | 45.00 | 4390.0 | Fri | 9 | Mar | 2012 | 03:18:36 | 2012-03-09 03:18:36 | 3 | 3 | Food - Food |
| 45843 | Stickmen Brewery & Skewery | http://www.kickstarter.com/projects/1926119894... | Food | Food | Lake Oswego, OR | failed | 30000.0 | 3836.0 | 0.127867 | 42 | Thu, 01 Mar 2012 23:00:00 -0000 | 11 | $5,$15,$25,$50,$100,$250,$500,$1,000,$2,500,$5... | 3 | 2 | 42.24 | 26164.0 | Thu | 1 | Mar | 2012 | 23:00:00 | 2012-03-01 23:00:00 | 23 | 3 | Food - Food |
| 45844 | Stickmen Brewery & Skewery | http://www.kickstarter.com/projects/1926119894... | Food | Food | Lake Oswego, OR | failed | 30000.0 | 3836.0 | 0.127867 | 42 | Thu, 01 Mar 2012 23:00:00 -0000 | 11 | $5,$15,$25,$50,$100,$250,$500,$1,000,$2,500,$5... | 3 | 2 | 42.24 | 26164.0 | Thu | 1 | Mar | 2012 | 23:00:00 | 2012-03-01 23:00:00 | 23 | 3 | Food - Food |
| 45936 | The Best Little Sweet Shop in Vegas: Naked Cit... | http://www.kickstarter.com/projects/781435581/... | Food | Food | Las Vegas, NV | failed | 10000.0 | 252.0 | 0.025200 | 6 | Sat, 24 Mar 2012 01:46:13 -0000 | 19 | $1,$10,$25,$50,$75,$100,$100,$125,$150,$150,$1... | 0 | 0 | 59.96 | 9748.0 | Sat | 24 | Mar | 2012 | 01:46:13 | 2012-03-24 01:46:13 | 1 | 3 | Food - Food |
1298 rows Ć 26 columns
my_order = df.groupby(by="Category and subcategory")["Goal"].median().sort_values(ascending = False).index
goal_violin = sns.catplot(x="Category and subcategory", y="Goal", hue="Status",
kind="box", data=df, height = 5, aspect = 5, showfliers = False, order = my_order)
plt.ylim(0, 70000)
#[ax.axvline(goal_violin, color = 'r', linestyle='--') for x in [1,2,3,4]] # you can put your desired colour instead of red.
plt.xticks(rotation=90)
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48]),
[Text(0, 0, 'Design - Product Design'),
Text(1, 0, 'Technology - Technology'),
Text(2, 0, 'Technology - Open Hardware'),
Text(3, 0, 'Film & Video - Documentary'),
Text(4, 0, 'Film & Video - Narrative Film'),
Text(5, 0, 'Food - Food'),
Text(6, 0, 'Design - Design'),
Text(7, 0, 'Games - Video Games'),
Text(8, 0, 'Film & Video - Animation'),
Text(9, 0, 'Technology - Open Software'),
Text(10, 0, 'Publishing - Nonfiction'),
Text(11, 0, 'Film & Video - Film & Video'),
Text(12, 0, 'Film & Video - Webseries'),
Text(13, 0, 'Games - Board & Card Games'),
Text(14, 0, 'Fashion - Fashion'),
Text(15, 0, "Publishing - Children's Book"),
Text(16, 0, 'Music - Jazz'),
Text(17, 0, 'Publishing - Publishing'),
Text(18, 0, 'Publishing - Periodical'),
Text(19, 0, 'Music - World Music'),
Text(20, 0, 'Publishing - Art Book'),
Text(21, 0, 'Music - Pop'),
Text(22, 0, 'Publishing - Journalism'),
Text(23, 0, 'Art - Sculpture'),
Text(24, 0, 'Music - Country & Folk'),
Text(25, 0, 'Music - Music'),
Text(26, 0, 'Publishing - Fiction'),
Text(27, 0, 'Art - Mixed Media'),
Text(28, 0, 'Photography - Photography'),
Text(29, 0, 'Film & Video - Short Film'),
Text(30, 0, 'Art - Public Art'),
Text(31, 0, 'Music - Classical Music'),
Text(32, 0, 'Comics - Comics'),
Text(33, 0, 'Music - Hip-Hop'),
Text(34, 0, 'Games - Games'),
Text(35, 0, 'Theater - Theater'),
Text(36, 0, 'Art - Art'),
Text(37, 0, 'Design - Crafts'),
Text(38, 0, 'Dance - Dance'),
Text(39, 0, 'Music - Indie Rock'),
Text(40, 0, 'Music - Rock'),
Text(41, 0, 'Art - Performance Art'),
Text(42, 0, 'Art - Painting'),
Text(43, 0, 'Art - Illustration'),
Text(44, 0, 'Art - Digital Art'),
Text(45, 0, 'Music - Electronic Music'),
Text(46, 0, 'Design - Graphic Design'),
Text(47, 0, 'Publishing - Poetry'),
Text(48, 0, 'Art - Conceptual Art')])
df_median_goal_success = df_successful.groupby('Category and subcategory').agg({'Goal':np.median}).reset_index()
df_median_goal_failed = df_failed.groupby('Category and subcategory').agg({'Goal':np.median}).reset_index()
df_median_goal = pd.concat([df_median_goal_success, df_median_goal_failed], axis = 1)
df_median_goal
#df_median_goal.to_csv("df_median_goal.csv", index=False)
| Category and subcategory | Goal | Category and subcategory | Goal | |
|---|---|---|---|---|
| 0 | Art - Art | 2500.00 | Art - Art | 3316.5 |
| 1 | Art - Conceptual Art | 1500.00 | Art - Conceptual Art | 3500.0 |
| 2 | Art - Digital Art | 1000.00 | Art - Digital Art | 5000.0 |
| 3 | Art - Illustration | 2500.00 | Art - Illustration | 3000.0 |
| 4 | Art - Mixed Media | 2400.00 | Art - Mixed Media | 5000.0 |
| 5 | Art - Painting | 2000.00 | Art - Painting | 3500.0 |
| 6 | Art - Performance Art | 2000.00 | Art - Performance Art | 3000.0 |
| 7 | Art - Public Art | 2500.00 | Art - Public Art | 5000.0 |
| 8 | Art - Sculpture | 2500.00 | Art - Sculpture | 5000.0 |
| 9 | Comics - Comics | 2500.00 | Comics - Comics | 5000.0 |
| 10 | Dance - Dance | 2500.00 | Dance - Dance | 3000.0 |
| 11 | Design - Crafts | 1200.00 | Design - Crafts | 3500.0 |
| 12 | Design - Design | 5000.00 | Design - Design | 9950.0 |
| 13 | Design - Graphic Design | 1500.00 | Design - Graphic Design | 3250.0 |
| 14 | Design - Product Design | 8000.00 | Design - Product Design | 12000.0 |
| 15 | Fashion - Fashion | 3000.00 | Fashion - Fashion | 5000.0 |
| 16 | Film & Video - Animation | 3200.00 | Film & Video - Animation | 10000.0 |
| 17 | Film & Video - Documentary | 6000.00 | Film & Video - Documentary | 10000.0 |
| 18 | Film & Video - Film & Video | 3500.00 | Film & Video - Film & Video | 7900.0 |
| 19 | Film & Video - Narrative Film | 5000.00 | Film & Video - Narrative Film | 11000.0 |
| 20 | Film & Video - Short Film | 2500.00 | Film & Video - Short Film | 4500.0 |
| 21 | Film & Video - Webseries | 3000.00 | Film & Video - Webseries | 5000.0 |
| 22 | Food - Food | 5000.00 | Food - Food | 10000.0 |
| 23 | Games - Board & Card Games | 5000.00 | Games - Board & Card Games | 8000.0 |
| 24 | Games - Games | 2500.00 | Games - Games | 5500.0 |
| 25 | Games - Video Games | 5000.00 | Games - Video Games | 8000.0 |
| 26 | Music - Classical Music | 2750.00 | Music - Classical Music | 5000.0 |
| 27 | Music - Country & Folk | 3000.00 | Music - Country & Folk | 5000.0 |
| 28 | Music - Electronic Music | 2000.00 | Music - Electronic Music | 2500.0 |
| 29 | Music - Hip-Hop | 2100.00 | Music - Hip-Hop | 4000.0 |
| 30 | Music - Indie Rock | 2500.00 | Music - Indie Rock | 3500.0 |
| 31 | Music - Jazz | 3150.00 | Music - Jazz | 6250.0 |
| 32 | Music - Music | 3000.00 | Music - Music | 4000.0 |
| 33 | Music - Pop | 3000.00 | Music - Pop | 5000.0 |
| 34 | Music - Rock | 2400.00 | Music - Rock | 2650.0 |
| 35 | Music - World Music | 3467.17 | Music - World Music | 5000.0 |
| 36 | Photography - Photography | 2500.00 | Photography - Photography | 4000.0 |
| 37 | Publishing - Art Book | 2200.00 | Publishing - Art Book | 5000.0 |
| 38 | Publishing - Children's Book | 3287.50 | Publishing - Children's Book | 5000.0 |
| 39 | Publishing - Fiction | 1500.00 | Publishing - Fiction | 3500.0 |
| 40 | Publishing - Journalism | 2500.00 | Publishing - Journalism | 5000.0 |
| 41 | Publishing - Nonfiction | 3000.00 | Publishing - Nonfiction | 5500.0 |
| 42 | Publishing - Periodical | 2500.00 | Publishing - Periodical | 6000.0 |
| 43 | Publishing - Poetry | 1500.00 | Publishing - Poetry | 3000.0 |
| 44 | Publishing - Publishing | 2500.00 | Publishing - Publishing | 5000.0 |
| 45 | Technology - Open Hardware | 6000.00 | Technology - Open Hardware | 10000.0 |
| 46 | Technology - Open Software | 4048.00 | Technology - Open Software | 8000.0 |
| 47 | Technology - Technology | 6000.00 | Technology - Technology | 13000.0 |
| 48 | Theater - Theater | 2500.00 | Theater - Theater | 4500.0 |
"""fig, ax = plt.subplots(figsize=(25,15))
scatter_subcat = sns.scatterplot(data=df, x='Duration', y='Funded percentage', hue='Category', palette="Spectral", size = 0.1)
ax.axhline(y = 1, color='red', linewidth=2, alpha=.7, ls='--')
#plt.xticks(rotation=90)
plt.xlim(0, 50)
plt.ylim(0, 5)
plt.legend(loc='upper left')"""
'fig, ax = plt.subplots(figsize=(25,15))\nscatter_subcat = sns.scatterplot(data=df, x=\'Duration\', y=\'Funded percentage\', hue=\'Category\', palette="Spectral", size = 0.1)\nax.axhline(y = 1, color=\'red\', linewidth=2, alpha=.7, ls=\'--\')\n#plt.xticks(rotation=90)\nplt.xlim(0, 50)\nplt.ylim(0, 5)\nplt.legend(loc=\'upper left\')'
fig, ax = plt.subplots(figsize=(15,8))
y_hist= plt.hist(df_successful["Duration"].values.flatten(), bins =200,
histtype = "stepfilled", color = "#7EAF92", density = True, stacked = True)
plt.xlim(0, 100)
plt.xlabel("Duration")
plt.ylabel("Density")
Text(0, 0.5, 'Density')
fig, ax = plt.subplots(figsize=(15,8))
y_hist= plt.hist(df_failed["Duration"].values.flatten(), bins =200,
histtype = "stepfilled", color = "red", density = True, stacked = True)
plt.xlim(0, 100)
plt.xlabel("Duration")
plt.ylabel("Density")
Text(0, 0.5, 'Density')
goal_violin = sns.catplot(x="Category and subcategory", y="Duration", hue="Status",
kind="box", data=df, height = 10, aspect = 5, showfliers = False)
plt.ylim(0, 100)
plt.xticks(rotation=90)
#
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48]),
[Text(0, 0, 'Art - Art'),
Text(1, 0, 'Art - Conceptual Art'),
Text(2, 0, 'Art - Digital Art'),
Text(3, 0, 'Art - Illustration'),
Text(4, 0, 'Art - Mixed Media'),
Text(5, 0, 'Art - Painting'),
Text(6, 0, 'Art - Performance Art'),
Text(7, 0, 'Art - Public Art'),
Text(8, 0, 'Art - Sculpture'),
Text(9, 0, 'Comics - Comics'),
Text(10, 0, 'Dance - Dance'),
Text(11, 0, 'Design - Crafts'),
Text(12, 0, 'Design - Design'),
Text(13, 0, 'Design - Graphic Design'),
Text(14, 0, 'Design - Product Design'),
Text(15, 0, 'Fashion - Fashion'),
Text(16, 0, 'Film & Video - Animation'),
Text(17, 0, 'Film & Video - Documentary'),
Text(18, 0, 'Film & Video - Film & Video'),
Text(19, 0, 'Film & Video - Narrative Film'),
Text(20, 0, 'Film & Video - Short Film'),
Text(21, 0, 'Film & Video - Webseries'),
Text(22, 0, 'Food - Food'),
Text(23, 0, 'Games - Board & Card Games'),
Text(24, 0, 'Games - Games'),
Text(25, 0, 'Games - Video Games'),
Text(26, 0, 'Music - Classical Music'),
Text(27, 0, 'Music - Country & Folk'),
Text(28, 0, 'Music - Electronic Music'),
Text(29, 0, 'Music - Hip-Hop'),
Text(30, 0, 'Music - Indie Rock'),
Text(31, 0, 'Music - Jazz'),
Text(32, 0, 'Music - Music'),
Text(33, 0, 'Music - Pop'),
Text(34, 0, 'Music - Rock'),
Text(35, 0, 'Music - World Music'),
Text(36, 0, 'Photography - Photography'),
Text(37, 0, 'Publishing - Art Book'),
Text(38, 0, "Publishing - Children's Book"),
Text(39, 0, 'Publishing - Fiction'),
Text(40, 0, 'Publishing - Journalism'),
Text(41, 0, 'Publishing - Nonfiction'),
Text(42, 0, 'Publishing - Periodical'),
Text(43, 0, 'Publishing - Poetry'),
Text(44, 0, 'Publishing - Publishing'),
Text(45, 0, 'Technology - Open Hardware'),
Text(46, 0, 'Technology - Open Software'),
Text(47, 0, 'Technology - Technology'),
Text(48, 0, 'Theater - Theater')])
fig, ax = plt.subplots(figsize=(15,8))
labels = ["Successful", "Failed"]
duration_both = [df_successful["Duration"], df_failed["Duration"]]
#plt.hist(duration_both, bins = round((df["Duration"].max()-df["Duration"].min())/2), density=True)
plt.hist(duration_both, bins =45, density=False)
plt.xlim(0,100)
ax.xaxis.set_major_locator(plt.MaxNLocator(20))
plt.xlabel("Duration")
plt.ylabel("Count")
ax.legend(labels)
<matplotlib.legend.Legend at 0x23503592550>
fig, ax = plt.subplots(figsize=(15,8))
labels = ["Successful", "Failed"]
duration_both = [df_successful["Duration"], df_failed["Duration"]]
duration_hist_2 = plt.hist(duration_both,
bins = [df["Duration"].min(),45, 58, df["Duration"].max()],
density=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
plt.xlim(0,100)
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
ax.legend(labels, loc = "lower right")
#plt.xlabel("Duration")
plt.ylabel("Count")
Text(0, 0.5, 'Count')
"""from matplotlib.animation import FuncAnimation
#plt.style.use('seaborn-pastel')
fig, ax = plt.subplots(figsize=(15,8))
plt.xlim(0,100)
ax.xaxis.set_major_locator(plt.MaxNLocator(10))
line, = ax.plot([], [], lw=3)
def init():
line.set_data([], [])
return line,
def animate(i):
x = np.linspace(0, 4, 1000)
y = np.sin(2 * np.pi * (x - 0.01 * i))
line.set_data(x, y)
return line,
anim = FuncAnimation(fig, animate, init_func=init,
frames=200, interval=20, blit=True)
anim.save('sine_wave.gif', writer='imagemagick')"""
"from matplotlib.animation import FuncAnimation\n#plt.style.use('seaborn-pastel')\n\n\nfig, ax = plt.subplots(figsize=(15,8))\nplt.xlim(0,100)\nax.xaxis.set_major_locator(plt.MaxNLocator(10))\n\nline, = ax.plot([], [], lw=3)\n\ndef init():\n line.set_data([], [])\n return line,\ndef animate(i):\n x = np.linspace(0, 4, 1000)\n y = np.sin(2 * np.pi * (x - 0.01 * i))\n line.set_data(x, y)\n return line,\n\nanim = FuncAnimation(fig, animate, init_func=init,\n frames=200, interval=20, blit=True)\n\n\nanim.save('sine_wave.gif', writer='imagemagick')"
fig, ax = plt.subplots(figsize=(15,8))
goal_duration = sns.scatterplot(y="Goal", x="Duration", hue="Status", data=df)
plt.ylim(0, 400000)
plt.xlim(0, 100)
(0.0, 100.0)
fig, ax = plt.subplots(figsize=(25,15))
sns.scatterplot(data = df, x = "Datetime", y = "Funded percentage", hue = "Status" )
plt.ylim(0, 5)
plt.xticks(rotation=90)
(array([14365., 14488., 14610., 14730., 14853., 14975., 15095., 15218.,
15340., 15461.]),
[Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, '')])
fig, ax = plt.subplots(figsize=(8,8))
# Draw cumulative line
plt.hist(df["Datetime"], bins =1000,
histtype = "step", cumulative= True, color = '#027173')
# Plot total bar
bins =[df["Datetime"].min(), pd.to_datetime("1-1-2010"), pd.to_datetime("1-1-2011"), pd.to_datetime("1-1-2012"), df["Datetime"].max()]
# Plot success bar
plt.hist(df["Datetime"], bins =bins, histtype = "bar", color = "#fc8b8b")
# Plot success bar
plt.hist(df_successful["Datetime"], bins =bins, histtype = "bar", color = "#7EAF92")
plt.xticks(rotation = 90)
labels = ["Total Projects", "Successful", "Failed"]
plt.legend(labels, loc = "upper left")
plt.xlabel("Time")
plt.ylabel("Number of projects")
print()
fig, ax = plt.subplots(figsize=(4,8))
box_subcat = sns.boxplot(data=df_successful, x='Year', y='Funded percentage',
palette="Spectral", dodge = False, showfliers = False)
ax.axhline(y = 1, color='grey', linewidth=2, alpha=.7, ls='--')
plt.ylabel("Funded percentage for successful projects")
plt.ylim(1, 2.1)
(1.0, 2.1)
fig, ax = plt.subplots(figsize=(8,8))
# Plot success bar
plt.hist(df["Month numeric"], bins =12, histtype = "bar", color = "#fc8b8b")
# Plot success bar
plt.hist(df_successful["Month numeric"], bins =12, histtype = "bar", color = "#7EAF92")
plt.xticks(rotation = 90)
labels = ["Failed", "Successful"]
plt.legend(labels, loc = "upper left")
plt.xlabel("Month")
plt.ylabel("Number of projects")
ax.set_xticklabels(months, rotation=90) #set the labels and rotate them 90 deg.
plt.xticks(np.arange(1, 13, 1))
print()
month_count_total = df["Month"].value_counts()
month_count_successful = df_successful["Month"].value_counts()
month_count_failed = df_failed["Month"].value_counts()
month_all_counts = pd.concat([month_count_total, month_count_successful, month_count_failed], axis = 1)
column = ["Total", "Successful", "Failed"]
month_all_counts["Successful Percentage"] = round(month_count_successful/month_count_total*100).astype("int64")
month_all_counts["Failed Percentage"] = round(month_count_failed/month_count_total*100).astype("int64")
month_all_counts["Total Percentage"] = 100
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
month_all_counts = month_all_counts.reindex(months)
month_all_counts
| Month | Month | Month | Successful Percentage | Failed Percentage | Total Percentage | |
|---|---|---|---|---|---|---|
| Jan | 3105 | 1659 | 1446 | 53 | 47 | 100 |
| Feb | 3233 | 1764 | 1469 | 55 | 45 | 100 |
| Mar | 4559 | 2592 | 1967 | 57 | 43 | 100 |
| Apr | 5126 | 2929 | 2197 | 57 | 43 | 100 |
| May | 5708 | 3044 | 2664 | 53 | 47 | 100 |
| Jun | 2848 | 1536 | 1312 | 54 | 46 | 100 |
| Jul | 2847 | 1511 | 1336 | 53 | 47 | 100 |
| Aug | 3044 | 1666 | 1378 | 55 | 45 | 100 |
| Sep | 2755 | 1496 | 1259 | 54 | 46 | 100 |
| Oct | 2801 | 1530 | 1271 | 55 | 45 | 100 |
| Nov | 2640 | 1517 | 1123 | 57 | 43 | 100 |
| Dec | 3299 | 1725 | 1574 | 52 | 48 | 100 |
fig, ax = plt.subplots(figsize=(8,8))
labels = ["Failed", "Successful"]
#plt.hist(duration_both, bins = round((df["Duration"].max()-df["Duration"].min())/2), density=True)
plt.bar(month_all_counts.index, month_all_counts["Total Percentage"], color = "#fc8b8b")
plt.bar(month_all_counts.index, month_all_counts["Successful Percentage"], color = "#7EAF92")
plt.ylim(0,100)
ax.yaxis.set_major_locator(plt.MaxNLocator(10))
ax.legend(labels)
plt.xticks(rotation=90)
plt.xticks(rotation=90)
plt.xlabel("Months")
plt.ylabel("Percentage of success and failure")
Text(0, 0.5, 'Percentage of success and failure')
fig, ax = plt.subplots(figsize=(4,8))
box_subcat = sns.boxplot(data=df_successful, x='Month', y='Funded percentage',
palette="Spectral", dodge = False, showfliers = False, order = months)
ax.axhline(y = 1, color='grey', linewidth=2, alpha=.7, ls='--')
plt.ylabel("Funded percentage for successful projects")
plt.ylim(1, 1.9)
plt.xticks(rotation = 90)
print()